Prerequisites

Load required packages

library(tidyverse)
library(ggplot2)
library(rtweet)
library(readr)

Dataset

Import processed data, which can be found here.

#read preprocessed data
wines <- read.csv(file = '../data/processed_data/wines.csv')

Get sample of dataset

#set seed value to birthday of Ricardo Rodriguez, American wrestler and ring announcer and Dr. Reinaldo (Rei) Sanchez-Arias
set.seed(19630217)

#set percentage to test with for simplicity
percentage <- 5
wine_sample<- sample_n(wines, percentage/100*nrow(wines))

Split Taster data into different Data Frame

tasters <- wines %>%
  select(taster_name, taster_twitter_handle) %>% unique()
tasters

Drop taster_twitter_handle in wines dataframe

wines <- wines %>%
  select(-taster_twitter_handle)
head(wines)

Add Rating Classification

Add following classification to wine dataset as found on the website:

Category Rating Description
Classic 98-100 The pinnacle of quality.
Superb 94-97 A great achievement.
Excellent 90-93 Highly recommended.
Very Good 87-89 Often good value; well recommended.
Good 83-86 Suitable for everyday consumption; often good value.
Acceptable 80-82 Can be employed in casual, less-critical circumstances
# function to add rating
rating_category <- function(points){
  if(points>=98){
    return("Classic")
  }
  else if (points>=94){
    return("Superb")
  }
  else if(points>=90){
    return("Excellent")
  }
  else if(points>=87){
    return("Very Good")
  }
  else if(points>=83){
    return("Good")
  }
  else{
    return("Acceptable")
  }
}

wines<- wines %>%
  rowwise() %>%
  mutate(rating_category = rating_category(points))
head(wines)

Explore the Data

EDA (correlation priceXpoints, with DataExplorer library? using (this)[https://datascienceplus.com/blazing-fast-eda-in-r-with-dataexplorer/])

wines %>% 
  ggplot() +
    geom_point(mapping = (aes(x = points, y = price)), na.rm = T)
wines %>%
    summarize(avg_price = mean(price, na.rm=TRUE), 
              sd_price = sd(price, na.rm=TRUE),
              lowest_price = min(price, na.rm=TRUE),
              highest_price = max(price,na.rm=TRUE))
wines %>%
    summarize(avg_points = mean(points, na.rm=TRUE), 
              sd_points = sd(points, na.rm=TRUE),
              lowest_points = min(points, na.rm=TRUE),
              highest_points = max(points,na.rm=TRUE))

Select the provinces based on points and Select the best province for wine based on the average points of the sample size.

#find the average number of points across the 1,000 samples

wine_per_province <- wine %>% 
  select(province, points) %>% 
  summarise(points = mean(points))
wine_per_province

#Find the best province for wine using the average points across the 1,000 samples #drop the descriptions or just select price? set points to max(points)

best_province <- wine_sample %>% 
  group_by(province, points) %>% 
  filter(points > 88.669)
best_province  

Rating distribution

Best wine, by variety

#wine_best_variety <- 
wines %>% 
  group_by(variety) %>% 
  summarise(mean_points = mean(points)) %>% 
  arrange(desc(mean_points)) 
  
user_price <- readline(prompt = "How much are you willing to spend on a bottle?")
user_price <- as.integer(user_price)

wines %>% 
  filter(price <= user_price) %>% 
  arrange(desc(points)) %>% 
  select(title, price, points)

Conclusion

LS0tCnRpdGxlOiAiRXhwbG9yaW5nIGFuZCBBbmFseWl6aW5nIFdpbmUgRW50aHVzaWFzdCBSZXZpZXdzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIFByZXJlcXVpc2l0ZXMKCkxvYWQgcmVxdWlyZWQgcGFja2FnZXMKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocnR3ZWV0KQpsaWJyYXJ5KHJlYWRyKQpgYGAKCiMgRGF0YXNldAoKSW1wb3J0IHByb2Nlc3NlZCBkYXRhLCB3aGljaCBjYW4gYmUgZm91bmQgW2hlcmVdKGh0dHBzOi8vZ2l0aHViLmNvbS9DNHJieW4zbTRuL3dpbmVfcmV2aWV3c19kYXRhX2FuYWx5c2lzL2Jsb2IvbWFzdGVyL2RhdGEvcHJvY2Vzc2VkX2RhdGEvcHJlcHJvY2Vzc2luZy5ybWQpLgoKYGBge3J9CiNyZWFkIHByZXByb2Nlc3NlZCBkYXRhCndpbmVzIDwtIHJlYWQuY3N2KGZpbGUgPSAnLi4vZGF0YS9wcm9jZXNzZWRfZGF0YS93aW5lcy5jc3YnKQpgYGAKCkdldCBzYW1wbGUgb2YgZGF0YXNldApgYGB7cn0KI3NldCBzZWVkIHZhbHVlIHRvIGJpcnRoZGF5IG9mIFJpY2FyZG8gUm9kcmlndWV6LCBBbWVyaWNhbiB3cmVzdGxlciBhbmQgcmluZyBhbm5vdW5jZXIgYW5kIERyLiBSZWluYWxkbyAoUmVpKSBTYW5jaGV6LUFyaWFzCnNldC5zZWVkKDE5NjMwMjE3KQoKI3NldCBwZXJjZW50YWdlIHRvIHRlc3Qgd2l0aCBmb3Igc2ltcGxpY2l0eQpwZXJjZW50YWdlIDwtIDUKd2luZV9zYW1wbGU8LSBzYW1wbGVfbih3aW5lcywgcGVyY2VudGFnZS8xMDAqbnJvdyh3aW5lcykpCmBgYAoKIyMjIFNwbGl0IFRhc3RlciBkYXRhIGludG8gZGlmZmVyZW50IERhdGEgRnJhbWUKCmBgYHtyfQp0YXN0ZXJzIDwtIHdpbmVzICU+JQogIHNlbGVjdCh0YXN0ZXJfbmFtZSwgdGFzdGVyX3R3aXR0ZXJfaGFuZGxlKSAlPiUgdW5pcXVlKCkKdGFzdGVycwpgYGAKCkRyb3AgYHRhc3Rlcl90d2l0dGVyX2hhbmRsZWAgaW4gd2luZXMgZGF0YWZyYW1lCgpgYGB7cn0Kd2luZXMgPC0gd2luZXMgJT4lCiAgc2VsZWN0KC10YXN0ZXJfdHdpdHRlcl9oYW5kbGUpCmhlYWQod2luZXMpCmBgYAoKIyMjIEFkZCBSYXRpbmcgQ2xhc3NpZmljYXRpb24KCkFkZCBmb2xsb3dpbmcgY2xhc3NpZmljYXRpb24gdG8gd2luZSBkYXRhc2V0IGFzIGZvdW5kIG9uIHRoZSBbd2Vic2l0ZV0oaHR0cHM6Ly93d3cud2luZW1hZy5jb20vMjAxMC8wNC8wOS95b3UtYXNrZWQtaG93LWlzLWEtd2luZXMtc2NvcmUtZGV0ZXJtaW5lZC8pOgoKfENhdGVnb3J5ICB8IFJhdGluZyAgfCBEZXNjcmlwdGlvbiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfAp8LS0tLS0tLS0tLXwtLS0tLS0tLS18LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS18CnxDbGFzc2ljICAgfAk5OC0xMDAgfCBUaGUgcGlubmFjbGUgb2YgcXVhbGl0eS4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfAp8U3VwZXJiICAgIHwJOTQtOTcJIHwgQSBncmVhdCBhY2hpZXZlbWVudC4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHwKfEV4Y2VsbGVudCB8CTkwLTkzCSB8IEhpZ2hseSByZWNvbW1lbmRlZC4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB8CnxWZXJ5IEdvb2QgfCAgODctODkJIHwgT2Z0ZW4gZ29vZCB2YWx1ZTsgd2VsbCByZWNvbW1lbmRlZC4gICAgICAgICAgICAgICAgICAgIHwKfEdvb2QJICAgICB8ICA4My04NgkgfCBTdWl0YWJsZSBmb3IgZXZlcnlkYXkgY29uc3VtcHRpb247IG9mdGVuIGdvb2QgdmFsdWUuICAgfAp8QWNjZXB0YWJsZXwJODAtODIJIHwgQ2FuIGJlIGVtcGxveWVkIGluIGNhc3VhbCwgbGVzcy1jcml0aWNhbCBjaXJjdW1zdGFuY2VzIHwKCmBgYHtyfQojIGZ1bmN0aW9uIHRvIGFkZCByYXRpbmcKcmF0aW5nX2NhdGVnb3J5IDwtIGZ1bmN0aW9uKHBvaW50cyl7CiAgaWYocG9pbnRzPj05OCl7CiAgICByZXR1cm4oIkNsYXNzaWMiKQogIH0KICBlbHNlIGlmIChwb2ludHM+PTk0KXsKICAgIHJldHVybigiU3VwZXJiIikKICB9CiAgZWxzZSBpZihwb2ludHM+PTkwKXsKICAgIHJldHVybigiRXhjZWxsZW50IikKICB9CiAgZWxzZSBpZihwb2ludHM+PTg3KXsKICAgIHJldHVybigiVmVyeSBHb29kIikKICB9CiAgZWxzZSBpZihwb2ludHM+PTgzKXsKICAgIHJldHVybigiR29vZCIpCiAgfQogIGVsc2V7CiAgICByZXR1cm4oIkFjY2VwdGFibGUiKQogIH0KfQoKd2luZXM8LSB3aW5lcyAlPiUKICByb3d3aXNlKCkgJT4lCiAgbXV0YXRlKHJhdGluZ19jYXRlZ29yeSA9IHJhdGluZ19jYXRlZ29yeShwb2ludHMpKQpoZWFkKHdpbmVzKQpgYGAKCiMgRXhwbG9yZSB0aGUgRGF0YQoKRURBIChjb3JyZWxhdGlvbiBwcmljZVhwb2ludHMsIHdpdGggYGBgRGF0YUV4cGxvcmVyYGBgIGxpYnJhcnk/IHVzaW5nICh0aGlzKVtodHRwczovL2RhdGFzY2llbmNlcGx1cy5jb20vYmxhemluZy1mYXN0LWVkYS1pbi1yLXdpdGgtZGF0YWV4cGxvcmVyL10pCmBgYHtyfQp3aW5lcyAlPiUgCiAgZ2dwbG90KCkgKwogICAgZ2VvbV9wb2ludChtYXBwaW5nID0gKGFlcyh4ID0gcG9pbnRzLCB5ID0gcHJpY2UpKSwgbmEucm0gPSBUKQpgYGAKCmBgYHtyfQp3aW5lcyAlPiUKICAgIHN1bW1hcml6ZShhdmdfcHJpY2UgPSBtZWFuKHByaWNlLCBuYS5ybT1UUlVFKSwgCiAgICAgICAgICAgICAgc2RfcHJpY2UgPSBzZChwcmljZSwgbmEucm09VFJVRSksCiAgICAgICAgICAgICAgbG93ZXN0X3ByaWNlID0gbWluKHByaWNlLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICBoaWdoZXN0X3ByaWNlID0gbWF4KHByaWNlLG5hLnJtPVRSVUUpKQpgYGAKCmBgYHtyfQp3aW5lcyAlPiUKICAgIHN1bW1hcml6ZShhdmdfcG9pbnRzID0gbWVhbihwb2ludHMsIG5hLnJtPVRSVUUpLCAKICAgICAgICAgICAgICBzZF9wb2ludHMgPSBzZChwb2ludHMsIG5hLnJtPVRSVUUpLAogICAgICAgICAgICAgIGxvd2VzdF9wb2ludHMgPSBtaW4ocG9pbnRzLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICBoaWdoZXN0X3BvaW50cyA9IG1heChwb2ludHMsbmEucm09VFJVRSkpCmBgYAoKU2VsZWN0IHRoZSBwcm92aW5jZXMgYmFzZWQgb24gcG9pbnRzICBhbmQgU2VsZWN0IHRoZSBiZXN0IHByb3ZpbmNlIGZvciB3aW5lIGJhc2VkIG9uIHRoZSBhdmVyYWdlIHBvaW50cyBvZiB0aGUgc2FtcGxlIHNpemUuIAoKI2ZpbmQgdGhlIGF2ZXJhZ2UgbnVtYmVyIG9mIHBvaW50cyBhY3Jvc3MgdGhlIDEsMDAwIHNhbXBsZXMKYGBge3J9CndpbmVfcGVyX3Byb3ZpbmNlIDwtIHdpbmUgJT4lIAogIHNlbGVjdChwcm92aW5jZSwgcG9pbnRzKSAlPiUgCiAgc3VtbWFyaXNlKHBvaW50cyA9IG1lYW4ocG9pbnRzKSkKd2luZV9wZXJfcHJvdmluY2UKYGBgCgoKI0ZpbmQgdGhlIGJlc3QgcHJvdmluY2UgZm9yIHdpbmUgdXNpbmcgdGhlIGF2ZXJhZ2UgcG9pbnRzIGFjcm9zcyB0aGUgMSwwMDAgc2FtcGxlcwojZHJvcCB0aGUgZGVzY3JpcHRpb25zIG9yIGp1c3Qgc2VsZWN0IHByaWNlPyBzZXQgcG9pbnRzIHRvIG1heChwb2ludHMpCmBgYHtyfQpiZXN0X3Byb3ZpbmNlIDwtIHdpbmVfc2FtcGxlICU+JSAKICBncm91cF9ieShwcm92aW5jZSwgcG9pbnRzKSAlPiUgCiAgZmlsdGVyKHBvaW50cyA+IDg4LjY2OSkKYmVzdF9wcm92aW5jZSAgCmBgYAoKClJhdGluZyBkaXN0cmlidXRpb24KCmBgYHtyfQoKYGBgCgpCZXN0IHdpbmUsIGJ5IHZhcmlldHkKYGBge3J9CiN3aW5lX2Jlc3RfdmFyaWV0eSA8LSAKd2luZXMgJT4lIAogIGdyb3VwX2J5KHZhcmlldHkpICU+JSAKICBzdW1tYXJpc2UobWVhbl9wb2ludHMgPSBtZWFuKHBvaW50cykpICU+JSAKICBhcnJhbmdlKGRlc2MobWVhbl9wb2ludHMpKSAKICAKYGBgCgpgYGB7cn0KdXNlcl9wcmljZSA8LSByZWFkbGluZShwcm9tcHQgPSAiSG93IG11Y2ggYXJlIHlvdSB3aWxsaW5nIHRvIHNwZW5kIG9uIGEgYm90dGxlPyIpCnVzZXJfcHJpY2UgPC0gYXMuaW50ZWdlcih1c2VyX3ByaWNlKQoKd2luZXMgJT4lIAogIGZpbHRlcihwcmljZSA8PSB1c2VyX3ByaWNlKSAlPiUgCiAgYXJyYW5nZShkZXNjKHBvaW50cykpICU+JSAKICBzZWxlY3QodGl0bGUsIHByaWNlLCBwb2ludHMpCmBgYAoKCiMgQ29uY2x1c2lvbgo=